{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# RAG using Meta AI Llama-3.2\n",
    "\n",
    "\n",
    "<img src=\"./resources/rag_architecture.png\" width=800px>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import nest_asyncio\n",
    "from IPython.display import Markdown, display\n",
    "\n",
    "from llama_index.core import Settings\n",
    "from llama_index.llms.ollama import Ollama\n",
    "from llama_index.core import PromptTemplate\n",
    "from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n",
    "from llama_index.core import VectorStoreIndex, ServiceContext, SimpleDirectoryReader, StorageContext\n",
    "from llama_index.core.postprocessor import SentenceTransformerRerank\n",
    "from llama_index.vector_stores.qdrant import QdrantVectorStore\n",
    "from llama_index.core import Settings\n",
    "import qdrant_client"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# allows nested access to the event loop\n",
    "nest_asyncio.apply()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# add your documents in this directory, you can drag & drop\n",
    "input_dir_path = './docs'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "collection_name=\"chat_with_docs\"\n",
    "\n",
    "client = qdrant_client.QdrantClient(\n",
    "    host=\"localhost\",\n",
    "    port=6333\n",
    ")\n",
    "\n",
    "def create_index(documents):\n",
    "    vector_store = QdrantVectorStore(client=client, collection_name=collection_name)\n",
    "    storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
    "    index = VectorStoreIndex.from_documents(\n",
    "        documents,\n",
    "        storage_context=storage_context,\n",
    "    )\n",
    "    return index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# setup llm & embedding model and reranker\n",
    "llm=Ollama(model=\"llama3.2:1b\", request_timeout=120.0)\n",
    "embed_model = HuggingFaceEmbedding( model_name=\"BAAI/bge-large-en-v1.5\", trust_remote_code=True)\n",
    "rerank = SentenceTransformerRerank(\n",
    "    model=\"cross-encoder/ms-marco-MiniLM-L-2-v2\", top_n=3\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Parsing nodes: 100%|██████████| 32/32 [00:00<00:00, 369.47it/s]\n",
      "Generating embeddings: 100%|██████████| 45/45 [00:25<00:00,  1.77it/s]\n"
     ]
    }
   ],
   "source": [
    "# load data\n",
    "loader = SimpleDirectoryReader(\n",
    "            input_dir = input_dir_path,\n",
    "            required_exts=[\".pdf\"],\n",
    "            recursive=True\n",
    "        )\n",
    "docs = loader.load_data()\n",
    "\n",
    "# Creating an index over loaded data\n",
    "Settings.embed_model = embed_model\n",
    "try:\n",
    "    index = create_index(docs)\n",
    "    print('Using Qdrant collection')\n",
    "except:\n",
    "    index = VectorStoreIndex.from_documents(docs, show_progress=True)\n",
    "\n",
    "# Create the query engine, where we use a cohere reranker on the fetched nodes\n",
    "Settings.llm = llm\n",
    "query_engine = index.as_query_engine(\n",
    "    similarity_top_k=10, node_postprocessors=[rerank]\n",
    ")\n",
    "\n",
    "# ====== Customise prompt template ======\n",
    "qa_prompt_tmpl_str = (\n",
    "\"Context information is below.\\n\"\n",
    "\"---------------------\\n\"\n",
    "\"{context_str}\\n\"\n",
    "\"---------------------\\n\"\n",
    "\"Given the context information above I want you to think step by step to answer the query in a crisp manner, incase case you don't know the answer say 'I don't know!'.\\n\"\n",
    "\"Query: {query_str}\\n\"\n",
    "\"Answer: \"\n",
    ")\n",
    "qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)\n",
    "\n",
    "query_engine.update_prompts(\n",
    "    {\"response_synthesizer:text_qa_template\": qa_prompt_tmpl}\n",
    ")\n",
    "\n",
    "# Generate the response\n",
    "response = query_engine.query(\"What exactly is DSPy?\",)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "DSPy stands for \"Deep Semantic Prompting and Parameterized Yield\". It is a programming model developed by Stanford Natural Language Processing Group that translates prompting techniques into parameterized declarative modules, which can be used to build complex natural language processing (NLP) systems. Specifically, DSPy allows users to define natural language signatures, or prompts, using a shorthand notation, and then uses these signatures to abstract and automate the task of prompting large language models, such as those used in transformer-based architectures like GPT-3.5."
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(str(response)))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
